{% extends "base.html" %} {% block content %}
Back
First we need to import all the needed libraries for this project
We our going to use:
I find it very interesting to see what where the subjects that each year, the mass media had most of their attention.
# coding: utf-8
from matplotlib import pyplot as plt
import pandas as pd
import collections
from collections import Counter,defaultdict,OrderedDict,namedtuple
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from datetime import datetime
import re
import sys
sys.path.insert(1, '/Users/elenikaranikola/Desktop/NewsCleanser')
from settings import years_colors
After installing and importing everything we are going to need we will read our data from the output.csv file and create the basic function for combining words per year and ploting in word clouds and bar charts
#read the data
df = pd.read_csv('output.csv')
#fill all null values in the table
df = df.fillna(" ")
#give the year of interest and return the 100 most popular words of the whole data
def common_year(year):
text0 = " "
for row_index,row in df.iterrows():
date = row['article_date']
text = row['article_body']
if re.search(r'\d\d\d\d',date).group(0) == year:
text0 = text0 + text
text_to_words = text0.split()
top = Counter(text_to_words).most_common(100)
return top
def plot_cloud(top,year):
wordcloud = WordCloud(width=1600, height=800, background_color='white').generate(str(top))
fig = plt.figure(figsize=(30,10), facecolor='white')
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis('off')
plt.title('100 Most Common Words in the year '+year, fontsize=100)
plt.tight_layout(pad=0)
plt.show()
def plot_barchart(top,year,my_color):
top = top[0:10]
words = []
count = []
for values in top:
words.append(values[0])
count.append(values[1])
plt.figure(figsize=(15, 5))
plt.bar(words, count, color = my_color)
plt.title('Bar chart for 10 most common words in '+year)
plt.show()
Once finished with the above we are going to call our function and create our visualizations.
#list of colors to use
#my_list = ['brown','saddlebrown','darkorange', 'darkolivegreen','green','mediumturquoise','deepskyblue','royalblue', 'blueviolet','orchid']
#cloud words
for i in range(2013,2021):
my_color = years_colors.pop(0)
words_combined = common_year(str(i))
plot_cloud(words_combined,str(i))
print('')
plot_barchart(words_combined,str(i),my_color)
print('')